In [10]:
import pandas as pd
import numpy as np
# Setup
import os
import pandas as pd
import numpy as np
import h5py
import scipy.io
import string
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import matplotlib.animation as animation
import matplotlib as mpl
import seaborn as sns
from scapy.all import RawPcapReader
from datetime import datetime
from pathlib import Path
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
csv_summary = pd.read_csv('../EDA/SAIDAS/04-csv_summary.csv')
activities = {
'A': 'Push forward',
'C': 'Hands up and down',
'P': 'Reading',
'S': 'Writing'
}
csv_summary = csv_summary.loc[
(csv_summary['test'] == 'fine_grained')
& (csv_summary['slot'] == 'Test')
& (csv_summary['label'].isin(activities.keys()))]
csv_summary['activity'] = csv_summary['label'].map(activities)
In [11]:
def get_subset(csv_summary,
monitors=['m1', 'm2', 'm3'],
environments=['Classroom', 'Office'],
labels=['A', 'C', 'P', 'S'],
samplesize=None,
random_state=42):
subset = csv_summary.loc[
(csv_summary['test'] == 'fine_grained')
& (csv_summary['slot'] == 'Test')
& (csv_summary['monitor'].isin(monitors))
& (csv_summary['environment'].isin(environments))
& (csv_summary['label'].isin(labels))
]
if samplesize is not None:
subset = subset.sample(n=samplesize, random_state=random_state)
subset = subset.reset_index(drop=True)
return subset
get_subset(csv_summary, monitors=['m1'], environments=['Classroom'], samplesize=10)
Out[11]:
| test | environment | monitor | slot | csv_file_path | filename | label | nr_packets | nr_subcarriers | set | activity | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | C_batch/batch_535.mat | C | 50.0 | 242.0 | test | Hands up and down |
| 1 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | S_batch/batch_606.mat | S | 50.0 | 242.0 | test | Writing |
| 2 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | C_batch/batch_770.mat | C | 50.0 | 242.0 | test | Hands up and down |
| 3 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | C_batch/batch_1054.mat | C | 50.0 | 242.0 | test | Hands up and down |
| 4 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | A_batch/batch_545.mat | A | 50.0 | 242.0 | test | Push forward |
| 5 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | S_batch/batch_109.mat | S | 50.0 | 242.0 | test | Writing |
| 6 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | A_batch/batch_396.mat | A | 50.0 | 242.0 | test | Push forward |
| 7 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | P_batch/batch_646.mat | P | 50.0 | 242.0 | test | Reading |
| 8 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | A_batch/batch_134.mat | A | 50.0 | 242.0 | test | Push forward |
| 9 | fine_grained | Classroom | m1 | Test | ../Data/fine_grained/Classroom/80MHz/3mo/m1/Sl... | S_batch/batch_326.mat | S | 50.0 | 242.0 | test | Writing |
In [51]:
def plot_tsne(subset, title='', output_file=f'SAIDAS/08-tsne-output.png', perplexity=30):
X = np.zeros((len(subset), 50 * 242 * 2))
for i in range(len(subset)):
csv_path = subset.iloc[i]['csv_file_path']
sample_file_path = subset.iloc[i]['filename']
base_folder = os.path.dirname(csv_path)
sample_file_path = os.path.join(base_folder, sample_file_path)
data = scipy.io.loadmat(sample_file_path)['csi_mon']
real_part = np.real(data)
imag_part = np.imag(data)
X[i] = np.concatenate((real_part.flatten(), imag_part.flatten()))
X = X.astype(np.float32)
labels = subset['activity'].tolist()
label_encoder = LabelEncoder()
numeric_labels = label_encoder.fit_transform(labels)
class_names = label_encoder.classes_
colormap = 'tab10'
cmap = plt.get_cmap(colormap, len(class_names))
tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
X_embedded = tsne.fit_transform(X)
x, y = X_embedded[:, 0], X_embedded[:, 1]
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
ax[0].scatter(x, y, c=numeric_labels, cmap=cmap, s=5)
ax[0].set_title(title)
ax[0].set_xlabel('t-SNE Component 1')
ax[0].set_ylabel('t-SNE Component 2')
ax[0].grid(True)
# Polar
r = np.sqrt(x**2 + y**2)
theta = np.arctan2(y, x)
ax[1].scatter(theta, r, c=numeric_labels, cmap=colormap, s=5)
ax[1].set_title(f'{title} (polar)')
ax[1].set_xlabel('radius')
ax[1].set_ylabel('theta')
plt.tight_layout()
legend_handles = []
for i, class_name in enumerate(class_names):
# Use the same cmap to get the color for each class
color = cmap(i)
legend_handles.append(plt.Line2D([0], [0], marker='o', color='w', label=class_name,
markersize=10, markerfacecolor=color))
plt.legend(handles=legend_handles, loc='upper right', bbox_to_anchor=(1.15, 1), fontsize=8)
plt.savefig(output_file, dpi=300, bbox_inches='tight')
plt.show()
In [54]:
subset = get_subset(csv_summary, monitors=['m1'], environments=['Classroom'])
plot_tsne(subset, title='Classroom - Monitor 1', output_file='SAIDAS/08-tsne-classroom-m1.png', perplexity=50)
In [55]:
subset = get_subset(csv_summary, environments=['Classroom'])
plot_tsne(subset, title='Classroom - Monitors 1, 2 and 3', output_file='SAIDAS/08-tsne-classroom-m1m2m3.png', perplexity=100)
In [56]:
subset = get_subset(csv_summary)
plot_tsne(subset, title='Classroom and Office - Monitors 1, 2 and 3', output_file='SAIDAS/08-tsne-classroom_office-m1m2m3.png', perplexity=100)
In [57]:
subset = get_subset(csv_summary, samplesize=5000)
plot_tsne(subset, title='Classroom and Office - Monitors 1, 2 and 3', output_file='SAIDAS/08-tsne-classroom_office-m1m2m3-5000.png', perplexity=100)